In [ ]:

    
%matplotlib nbagg
import matplotlib.pyplot as plt
import numpy as np



In [ ]:

    
from sklearn.datasets import load_digits
from sklearn.cross_validation import train_test_split
import numpy as np
np.set_printoptions(suppress=True)

digits = load_digits()
X, y = digits.data, digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y)

Removing mean and scaling variance



In [ ]:

    
from sklearn.preprocessing import StandardScaler

1) Instantiate the model



In [ ]:

    
scaler = StandardScaler()

2) Fit using only the data.



In [ ]:

    
scaler.fit(X_train)

3) transform the data (not predict).



In [ ]:

    
X_train_scaled = scaler.transform(X_train)



In [ ]:

    
X_train.shape



In [ ]:

    
X_train_scaled.shape

The transformed version of the data has the mean removed:



In [ ]:

    
X_train_scaled.mean(axis=0)



In [ ]:

    
X_train_scaled.std(axis=0)



In [ ]:

    
X_test_transformed = scaler.transform(X_test)

Principal Component Analysis

0) Import the model



In [ ]:

    
from sklearn.decomposition import PCA

1) Instantiate the model



In [ ]:

    
pca = PCA(n_components=2)

2) Fit to training data



In [ ]:

    
pca.fit(X)

3) Transform to lower-dimensional representation



In [ ]:

    
print(X.shape)
X_pca = pca.transform(X)
X_pca.shape

Visualize



In [ ]:

    
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y)

Manifold Learning



In [ ]:

    
from sklearn.manifold import Isomap
isomap = Isomap()



In [ ]:

    
X_isomap = isomap.fit_transform(X)



In [ ]:

    
plt.scatter(X_isomap[:, 0], X_isomap[:, 1], c=y)

Exercises

Visualize the digits dataset using the TSNE algorithm from the sklearn.manifold module (it runs for a couple of seconds).



In [ ]:

    
# %load solutions/digits_tsne.py